******************************************************************************
************* INPUT THE TARGA INFO 				   ***************************
******************************************************************************
clear all
*Import the new TARGA Data: 
import delimited "$root/Data/Original/TARGA_Auto.txt", bindquote(nobind) clear 

keep if fahrzeugart==10

gen marke_typ=marke+" "+ typ

rename bauarttreibstoff treibstoffcode

keep typengenehmigungsnummer getriebe1 marke_typ treibstoffcode leistungkw gesamtgewicht

rename typengenehmigungsnummer tgcode


*Create an additional frame to import the "Verbrauch" Data
frame create verbrauch 
frame change verbrauch

import delimited "$root/Data/Original/TARGA_verbrauch.txt",  bindquote(nobind) clear 

keep tgcode länge_von breite_von höhe_von treibstoffcode getriebe energieeffizienzkategorie et_co2 et_verbrauch el_verbrauch 

gen car_size=länge_von*breite_von/1000000
gen car_height=höhe_von/1000

drop länge_von breite_von höhe_von

collapse (firstnm) treibstoffcode energieeffizienzkategorie (mean) et_co2 et_verbrauch el_verbrauch car_size car_height, by(tgcode)


frame change default

frlink 1:1 tgcode , frame(verbrauch)
frget _all, from(verbrauch)

drop verbrauch 
drop treibstoff

*Standardize the marke_typ variable: 
replace marke_typ=lower(marke_typ)
replace marke_typ=trim(marke_typ)
replace marke_typ=subinstr(marke_typ,"volkswagen","vw",.)
replace marke_typ=subinstr(marke_typ,"wv","vw",.)
replace marke_typ=subinstr(marke_typ,"hunday","hyundai",.)
replace marke_typ=subinstr(marke_typ,"wv","vw",.)
replace marke_typ=subinstr(marke_typ,"mercedes benz","mercedes",.)
replace marke_typ=subinstr(marke_typ,"mercedes-benz","mercedes",.)
replace marke_typ=subinstr(marke_typ,"mercedes-amg","mercedes amg",.)
replace marke_typ=subinstr(marke_typ,"mc laren","mclaren",.)
replace marke_typ=subinstr(marke_typ,"alfa romeo","alfa-romeo",.)
replace marke_typ=subinstr(marke_typ,"alfra-romeo","alfa-romeo",.)
replace marke_typ=subinstr(marke_typ,"bmw alpina","alpina",.)
replace marke_typ=subinstr(marke_typ,"bmw-alpina","alpina",.)
replace marke_typ=subinstr(marke_typ,"alpine","alpina",.)
replace marke_typ=subinstr(marke_typ,"land rover","land-rover",.)
replace marke_typ=subinstr(marke_typ,"landrover","land-rover",.)
replace marke_typ=subinstr(marke_typ,"rolls royce","rolls-royce",.)
replace marke_typ=subinstr(marke_typ,"aston martin","aston-martin",.)
replace marke_typ=subinstr(marke_typ,"ë","e",.)
replace marke_typ=subinstr(marke_typ,"é","e",.)
replace marke_typ=subinstr(marke_typ,"quattro audi","audi",.)
replace marke_typ=subinstr(marke_typ,"range rover","land-rover",.)
replace marke_typ=subinstr(marke_typ,"alfa-romeo alfa-romeo","alfa-romeo",.)
replace marke_typ=subinstr(marke_typ,"alfa-romeo alfa","alfa-romeo",.)
replace marke_typ=subinstr(marke_typ,")","",.)
replace marke_typ=subinstr(marke_typ,"(","",.)
replace marke_typ=subinstr(marke_typ,"qu.","quattro",.)
replace marke_typ=subinstr(marke_typ,"-cng-technik","",.)
replace marke_typ=subinstr(marke_typ,"-dangel","",.)
replace marke_typ=subinstr(marke_typ,"stelvio","stelvio ",.)
replace marke_typ=subinstr(marke_typ,"giulia","giulia ",.)
replace marke_typ=subinstr(marke_typ,"discovery","discovery ",.)
replace marke_typ=subinstr(marke_typ,"discov.","discovery ",.)
replace marke_typ=subinstr(marke_typ,"disco.","discovery ",.)
replace marke_typ=subinstr(marke_typ,"octavia","octavia ",.)
replace marke_typ=subinstr(marke_typ,"hev"," hev",.)
replace marke_typ=subinstr(marke_typ,"spacetourer","spacetourer ",.)
replace marke_typ=subinstr(marke_typ,"rangerover","rangerover ",.)
replace marke_typ=subinstr(marke_typ,"grandc-max","grandc-max ",.)
replace marke_typ=subinstr(marke_typ,"vito","vito ",.)
replace marke_typ=subinstr(marke_typ,"grandcherokee","gr.cherokee ",.)
replace marke_typ=subinstr(marke_typ,"grand cherokee","gr.cherokee ",.)
replace marke_typ=subinstr(marke_typ,"baleno","baleno ",.)
replace marke_typ=subinstr(marke_typ, "grand vitara","vitara",.)
replace marke_typ=subinstr(marke_typ, "gr. vitara","vitara",.)
replace marke_typ=subinstr(marke_typ, "grd vitara","vitara",.)
replace marke_typ=subinstr(marke_typ, "grd.vitara","vitara",.)
replace marke_typ=subinstr(marke_typ, "vitara","vitara ",.)
replace marke_typ=subinstr(marke_typ, "forester","forester ",.)
replace marke_typ=subinstr(marke_typ, "rav4","rav-4",.)
replace marke_typ=subinstr(marke_typ, "rav4","rav-4",.)
replace marke_typ=subinstr(marke_typ, "mokka-x","mokka",.)
replace marke_typ=subinstr(marke_typ, "c hevrolet","chevrolet",.)
replace marke_typ=subinstr(marke_typ, "citroen ds","ds ds",.)
replace marke_typ=subinstr(marke_typ, "jumpy spacetourer","spacetourer",.)
replace marke_typ=subinstr(marke_typ, "jumpyspacet","spacetourer",.)
replace marke_typ=subinstr(marke_typ, "jumper","spacetourer",.)
replace marke_typ=subinstr(marke_typ, "doblo","doblò",.)
replace marke_typ=subinstr(marke_typ, "grand c-max","grc-max",.)
replace marke_typ=subinstr(marke_typ, "grandc-max","grc-max",.)
replace marke_typ=subinstr(marke_typ, "i30n","i30",.)
replace marke_typ=subinstr(marke_typ, "i30w","i30",.)
replace marke_typ=subinstr(marke_typ, "tuscon","tucson",.)
replace marke_typ=subinstr(marke_typ, "q30s","q30",.)
replace marke_typ=subinstr(marke_typ, "fiesta","fiesta ",.)
replace marke_typ=subinstr(marke_typ, "grand santa","santa ",.)
replace marke_typ=subinstr(marke_typ, "grandsanta","santa ",.)
replace marke_typ=subinstr(marke_typ, "rrsport","rr sport ",.)
replace marke_typ=subinstr(marke_typ, "ceed's","ceed",.)
replace marke_typ=subinstr(marke_typ, "b180d","b 180d",.)
replace marke_typ=subinstr(marke_typ, "b220d","b 220d",.)
replace marke_typ=subinstr(marke_typ, "c200","c 200",.)
replace marke_typ=subinstr(marke_typ, "c220","c 220",.)
replace marke_typ=subinstr(marke_typ, "c43","c 43",.)
replace marke_typ=subinstr(marke_typ, "cla200","cla 200",.)
replace marke_typ=subinstr(marke_typ, "cla250","cla 250",.)
replace marke_typ=subinstr(marke_typ, "cla45","cla 45",.)
replace marke_typ=subinstr(marke_typ, "e200d","e 200d",.)
replace marke_typ=subinstr(marke_typ, "e220","e 220",.)
replace marke_typ=subinstr(marke_typ, "g500","g 500",.)
replace marke_typ=subinstr(marke_typ, "gla200","gla 200",.)
replace marke_typ=subinstr(marke_typ, "gla220","gla 220",.)
replace marke_typ=subinstr(marke_typ, "gla250","gla 250",.)
replace marke_typ=subinstr(marke_typ, "mini 3door","mini ",.)
replace marke_typ=subinstr(marke_typ, "mini 5door","mini ",.)
replace marke_typ=subinstr(marke_typ, "mini cabrio","mini ",.)
replace marke_typ=subinstr(marke_typ, "mini jcw","mini ",.)
replace marke_typ=subinstr(marke_typ, "mini john","mini ",.)
replace marke_typ=subinstr(marke_typ, "mitsubishi mitsubishi","mitsubishi ",.)
replace marke_typ=subinstr(marke_typ, "mitsubishi outland.","mitsubishi outlander ",.)
replace marke_typ=subinstr(marke_typ, "qasqai","qashqai ",.)
replace marke_typ=subinstr(marke_typ, "x- trail","x-trail ",.)
replace marke_typ=subinstr(marke_typ, "crosslandx","crossland ",.)
replace marke_typ=subinstr(marke_typ, "x- trail","x-trail ",.)
replace marke_typ=subinstr(marke_typ, "expert traveller","traveller ",.)
replace marke_typ=subinstr(marke_typ, "experttr.","traveller ",.)
replace marke_typ=subinstr(marke_typ, "alhambra","alhambra ",.)
replace marke_typ=subinstr(marke_typ, "cupra ateca","ateca ",.)
replace marke_typ=subinstr(marke_typ, "citigo","citigo ",.)
replace marke_typ=subinstr(marke_typ, "superbc","superb ",.)
replace marke_typ=subinstr(marke_typ, "superd","superb ",.)
replace marke_typ=subinstr(marke_typ, "smart cabrio","smart ",.)
replace marke_typ=subinstr(marke_typ, "smart eq","smart ",.)
replace marke_typ=subinstr(marke_typ, "impreza","impreza ",.)
replace marke_typ=subinstr(marke_typ, "legacy outback","outback ",.)
replace marke_typ=subinstr(marke_typ, "levorg","levorg ",.)
replace marke_typ=subinstr(marke_typ, "subaru wrx","subaru xv ",.)
replace marke_typ=subinstr(marke_typ, "subaru xv","subaru xv ",.)
replace marke_typ=subinstr(marke_typ, "sx4s-cross","sx4 s-cross ",.)
replace marke_typ=subinstr(marke_typ, "audi a 4","audi a4 ",.)
replace marke_typ=subinstr(marke_typ, "vantage v8","v8 vantage ",.)
replace marke_typ=subinstr(marke_typ, "bmw 218","bmw 218d ",.)
replace marke_typ=subinstr(marke_typ, "bmw 225e","bmw 225xe ",.)
replace marke_typ=subinstr(marke_typ, "bmw 318","bmw 318d ",.)
replace marke_typ=subinstr(marke_typ, "bmw 323 i","bmw 323i ",.)
replace marke_typ=subinstr(marke_typ, "bmw 520 ","bmw 520d ",.)
replace marke_typ=subinstr(marke_typ, "fiat 500 x ","fiat 500x ",.)
replace marke_typ=subinstr(marke_typ, "fiat 500c ","fiat 500 ",.)
replace marke_typ=subinstr(marke_typ,"land-rover land-rover","land-rover",.)
replace marke_typ=subinstr(marke_typ,"mazda 3sport","mazda 3 sport",.)
replace marke_typ=subinstr(marke_typ,"mercedes a200","mercedes a 200",.)
replace marke_typ=subinstr(marke_typ,"volvo xc 90","volvo xc90",.)
replace marke_typ=subinstr(marke_typ,"volvo xc 40","volvo xc40",.)
replace marke_typ=subinstr(marke_typ,"mercedes a200","mercedes a 200",.)
replace marke_typ=subinstr(marke_typ,"passatv","passat",.)
replace marke_typ=subinstr(marke_typ,"conti.","continental",.)
replace marke_typ=subinstr(marke_typ,"bmw 218d i","bmw 218i",.)
replace marke_typ=subinstr(marke_typ,"bmw i3s","bmw i3",.)

*Check if tgcode is unique: 
isid tgcode




*Save the new TARGA data: 
save "$root/Data/Original/TARGA.dta", replace
